In [1]:
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd

print('all libs loaded')
all libs loaded
In [2]:
# start looking at the data 
gas = pd.read_csv('gas_prices.csv')
gas
Out[2]:
Year Australia Canada France Germany Italy Japan Mexico South Korea UK USA
0 1990 NaN 1.87 3.63 2.65 4.59 3.16 1.00 2.05 2.82 1.16
1 1991 1.96 1.92 3.45 2.90 4.50 3.46 1.30 2.49 3.01 1.14
2 1992 1.89 1.73 3.56 3.27 4.53 3.58 1.50 2.65 3.06 1.13
3 1993 1.73 1.57 3.41 3.07 3.68 4.16 1.56 2.88 2.84 1.11
4 1994 1.84 1.45 3.59 3.52 3.70 4.36 1.48 2.87 2.99 1.11
5 1995 1.95 1.53 4.26 3.96 4.00 4.43 1.11 2.94 3.21 1.15
6 1996 2.12 1.61 4.41 3.94 4.39 3.64 1.25 3.18 3.34 1.23
7 1997 2.05 1.62 4.00 3.53 4.07 3.26 1.47 3.34 3.83 1.23
8 1998 1.63 1.38 3.87 3.34 3.84 2.82 1.49 3.04 4.06 1.06
9 1999 1.72 1.52 3.85 3.42 3.87 3.27 1.79 3.80 4.29 1.17
10 2000 1.94 1.86 3.80 3.45 3.77 3.65 2.01 4.18 4.58 1.51
11 2001 1.71 1.72 3.51 3.40 3.57 3.27 2.20 3.76 4.13 1.46
12 2002 1.76 1.69 3.62 3.67 3.74 3.15 2.24 3.84 4.16 1.36
13 2003 2.19 1.99 4.35 4.59 4.53 3.47 2.04 4.11 4.70 1.59
14 2004 2.72 2.37 4.99 5.24 5.29 3.93 2.03 4.51 5.56 1.88
15 2005 3.23 2.89 5.46 5.66 5.74 4.28 2.22 5.28 5.97 2.30
16 2006 3.54 3.26 5.88 6.03 6.10 4.47 2.31 5.92 6.36 2.59
17 2007 3.85 3.59 6.60 6.88 6.73 4.49 2.40 6.21 7.13 2.80
18 2008 4.45 4.08 7.51 7.75 7.63 5.74 2.45 5.83 7.42 3.27

Our First Line Chart

In [3]:
x = [0,1,2,3,4]
y = [0,2,4,6,8]

# Resize your Graph (dpi specifies pixels per inch. When saving probably should use 300 if possible)
plt.figure(figsize=(8,5), dpi=100)

# Line 1

# Keyword Argument Notation
#plt.plot(x,y, label='2x', color='red', linewidth=2, marker='.', linestyle='--',
#markersize=10, markeredgecolor='blue')

# Shorthand notation
# fmt = '[color][marker][line]'
plt.plot(x,y, 'b^--', label='2x')

## Line 2

# select interval we want to plot points at
x2 = np.arange(0,4.5,0.5)

# Plot part of the graph as line
plt.plot(x2[:6], x2[:6]**2, 'r', label='X^2')

# Plot remainder of graph as a dot
plt.plot(x2[5:], x2[5:]**2, 'r--')

# Add a title (specify font parameters with fontdict)
plt.title('Our First Graph!', fontdict={'fontname': 'Comic Sans MS', 'fontsize': 20})

# X and Y labels
plt.xlabel('X Axis')
plt.ylabel('Y Axis')

# X, Y axis Tickmarks (scale of your graph)
plt.xticks([0,1,2,3,4,])
#plt.yticks([0,2,4,6,8,10])

# Add a legend
plt.legend()

# Save figure (dpi 300 is good when saving so graph has high resolution)
plt.savefig('mygraph.png', dpi=300)

# Show plot
plt.show()

Our First Bar Chart

In [4]:
labels = ['A', 'B', 'C']
values = [1,4,2]

plt.figure(figsize=(5,3), dpi=100)

bars = plt.bar(labels, values)

patterns = ['/', 'O', '*']
for bar in bars:
    bar.set_hatch(patterns.pop(0))

plt.savefig('barchart.png', dpi=300)

plt.show()

Charting Gas Data Over Time

In [6]:
#set the figure size
plt.figure(figsize=(9,7))
# Add a title (specify font parameters with fontdict)
plt.title('Gas Over Time', fontdict={'fontname': 'Times', 'fontsize': 20})

#do simple ploting on the data 
#use short hand for style
# plt.plot(gas.Year, gas.USA,'b.-', label='USA')
# plt.plot(gas.Year, gas.Canada,'r.-', label ='Canada')
# plt.plot(gas.Year, gas['South Korea'],'g.-', label = 'SK')
# plt.plot(gas.Year, gas['Australia'],'y.-', label = 'SK')


#plot specific members of the data chart
# countries_to_look_at = ['Australia', 'USA', 'Canada', 'South Korea']
# for country in gas:
#     if country in countries_to_look_at:
#         plt.plot(gas.Year, gas[country], marker='.', label =country)

#plot the whole data chart
for country in gas:
    if country != 'Year':
        plt.plot(gas.Year, gas[country], marker='.', label =country)

#ticks
year_ticks = (gas.Year[::3])
plt.xticks(year_ticks.tolist()+[2011])

# X and Y labels
plt.xlabel('Year')
plt.ylabel('US Dollars')

#save plot
#plt.savefig('Gas Price Fig', dpi=300)

#legend
plt.legend()
plt.show()

Load Fifa Data

In [7]:
fifa = pd.read_csv('fifa_data.csv')

fifa.head(5)
Out[7]:
Unnamed: 0 ID Name Age Photo Nationality Flag Overall Potential Club ... Composure Marking StandingTackle SlidingTackle GKDiving GKHandling GKKicking GKPositioning GKReflexes Release Clause
0 0 158023 L. Messi 31 https://cdn.sofifa.org/players/4/19/158023.png Argentina https://cdn.sofifa.org/flags/52.png 94 94 FC Barcelona ... 96.0 33.0 28.0 26.0 6.0 11.0 15.0 14.0 8.0 €226.5M
1 1 20801 Cristiano Ronaldo 33 https://cdn.sofifa.org/players/4/19/20801.png Portugal https://cdn.sofifa.org/flags/38.png 94 94 Juventus ... 95.0 28.0 31.0 23.0 7.0 11.0 15.0 14.0 11.0 €127.1M
2 2 190871 Neymar Jr 26 https://cdn.sofifa.org/players/4/19/190871.png Brazil https://cdn.sofifa.org/flags/54.png 92 93 Paris Saint-Germain ... 94.0 27.0 24.0 33.0 9.0 9.0 15.0 15.0 11.0 €228.1M
3 3 193080 De Gea 27 https://cdn.sofifa.org/players/4/19/193080.png Spain https://cdn.sofifa.org/flags/45.png 91 93 Manchester United ... 68.0 15.0 21.0 13.0 90.0 85.0 87.0 88.0 94.0 €138.6M
4 4 192985 K. De Bruyne 27 https://cdn.sofifa.org/players/4/19/192985.png Belgium https://cdn.sofifa.org/flags/7.png 91 92 Manchester City ... 88.0 68.0 58.0 51.0 15.0 13.0 5.0 10.0 13.0 €196.4M

5 rows × 89 columns

Plot Histograms

In [13]:
bins = [40,50,60,70,80,90,100]

plt.figure(figsize=(8,5))

plt.hist(fifa.Overall, bins=bins, color='#abcdef')

plt.xticks(bins)

plt.ylabel('Number of Players')
plt.xlabel('Skill Level')
plt.title('Distribution of Player Skills in FIFA 2018')

#to save data
# plt.savefig('histogram.png', dpi=300)

plt.show()

Pie Chart

In [22]:
#bring in data from certain rows
left = fifa.loc[fifa['Preferred Foot'] == 'Left'].count()[0]
right = fifa.loc[fifa['Preferred Foot'] == 'Right'].count()[0]

#set up chart values 
plt.figure(figsize=(8,5))
labels = ['Left', 'Right']
colors = ['#abcdef', '#aabbcc']

#build out the chart
plt.pie([left, right], labels = labels, colors=colors, autopct='%.2f %%')
plt.title('Foot Preference of FIFA Players')

#show chart
plt.show()

Pie Chart #2

In [38]:
#get player weights
fifa.Weight = [int(x.strip('lbs')) if type(x)==str else x for x in fifa.Weight]

#break down the weight classes
light = fifa.loc[fifa.Weight < 125].count()[0]
light_medium = fifa[(fifa.Weight >= 125) & (fifa.Weight < 150)].count()[0]
medium = fifa[(fifa.Weight >= 150) & (fifa.Weight < 175)].count()[0]
medium_heavy = fifa[(fifa.Weight >= 175) & (fifa.Weight < 200)].count()[0]
heavy = fifa[fifa.Weight >= 200].count()[0]

#prepare data for charting
weights = [light,light_medium, medium, medium_heavy, heavy]
label = ['under 125', '125-150', '150-175', '175-200', 'over 200']
explode = (.4,.2,0,0,.4)

#set chart style
plt.style.use('ggplot')
plt.figure(figsize=(8,5), dpi=100)

#show the charting data 
plt.pie(weights, labels=label, pctdistance=0.8, autopct='%.1f %%', explode =explode)
plt.title('Weight Distribution of FIFA Players')
plt.show()

Box and Whiskers Chart

In [62]:
#lets take a look at the overall scores of the differnt clubs
barcelona = fifa.loc[fifa.Club == "FC Barcelona"]['Overall']
madrid = fifa.loc[fifa.Club == "Real Madrid"]['Overall']
revs = fifa.loc[fifa.Club == "New England Revolution"]['Overall']

#set up chart
plt.figure(figsize=(7,8), dpi=100)
plt.style.use('default')
labels=['FC Barcelona','Real Madrid', 'NE Revolution']
#data , Labels, Patch Artist Tools Active, Median Line Size
bp = plt.boxplot([barcelona, madrid, revs], labels = labels,
                 patch_artist=True, medianprops={'linewidth': 2})

plt.title('Professional Soccer Team Comparison')
plt.ylabel('FIFA Overall Rating')

#set the color for each box 
for box in bp['boxes']:
    # change outline color
    box.set(color='#4286f4', linewidth=2)
    # change fill color
    box.set(facecolor = '#e0e0e0' )
    # change hatch
    box.set(hatch = '/')

plt.show()